/*
 * this code comes under GPL
 * This code was taken from http://www.mpg123.org
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
 *
 * Local ChangeLog:
 * - Partial loops unrolling and removing MOVW insn from loops
*/
#include "config.h"
#include "mangle.h"
#include "mpg123.h"

static const unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
static const unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
const unsigned int __attribute__((aligned(16))) costab_mmx[] =
{
    1056974725,
    1057056395,
    1057223771,
    1057485416,
    1057855544,
    1058356026,
    1059019886,
    1059897405,
    1061067246,
    1062657950,
    1064892987,
    1066774581,
    1069414683,
    1073984175,
    1079645762,
    1092815430,
    1057005197,
    1057342072,
    1058087743,
    1059427869,
    1061799040,
    1065862217,
    1071413542,
    1084439708,
    1057128951,
    1058664893,
    1063675095,
    1076102863,
    1057655764,
    1067924853,
    1060439283,
};

/**
  This array of magic numbers were calculated by the pure function
  make_decode_tables_MMX(32768), which had been implemented in (deleted since
  r23383) tabinit_MMX.c.
  */
static const short __attribute__((aligned(8))) mp3lib_decwins[] =
{
        0,      7,     54,    114,    510,   1288,   1644,   9372,
    18760,  -9373,   1644,  -1289,    510,   -115,     54,     -8,
        0,      7,     54,    114,    510,   1288,   1644,   9372,
    18760,  -9373,   1644,  -1289,    510,   -115,     54,     -8,
        0,      7,     55,    129,    500,   1379,   1490,   9834,
    18748,  -8910,   1784,  -1197,    516,   -101,     52,     -7,
        0,      7,     55,    129,    500,   1379,   1490,   9834,
    18748,  -8910,   1784,  -1197,    516,   -101,     52,     -7,
        0,      8,     56,    145,    488,   1469,   1322,  10294,
    18714,  -8448,   1910,  -1107,    520,    -87,     51,     -6,
        0,      8,     56,    145,    488,   1469,   1322,  10294,
    18714,  -8448,   1910,  -1107,    520,    -87,     51,     -6,
        0,      9,     57,    161,    474,   1559,   1141,  10751,
    18658,  -7987,   2023,  -1016,    522,    -74,     49,     -6,
        0,      9,     57,    161,    474,   1559,   1141,  10751,
    18658,  -7987,   2023,  -1016,    522,    -74,     49,     -6,
        0,     10,     57,    177,    456,   1647,    944,  11205,
    18579,  -7528,   2123,   -927,    522,    -61,     48,     -5,
        0,     10,     57,    177,    456,   1647,    944,  11205,
    18579,  -7528,   2123,   -927,    522,    -61,     48,     -5,
        0,     11,     57,    194,    435,   1733,    734,  11654,
    18477,  -7073,   2210,   -838,    519,    -50,     46,     -5,
        0,     11,     57,    194,    435,   1733,    734,  11654,
    18477,  -7073,   2210,   -838,    519,    -50,     46,     -5,
        0,     12,     57,    212,    411,   1817,    510,  12097,
    18354,  -6621,   2285,   -751,    515,    -39,     44,     -4,
        0,     12,     57,    212,    411,   1817,    510,  12097,
    18354,  -6621,   2285,   -751,    515,    -39,     44,     -4,
        0,     13,     57,    229,    384,   1899,    271,  12534,
    18209,  -6174,   2348,   -666,    508,    -28,     43,     -4,
        0,     13,     57,    229,    384,   1899,    271,  12534,
    18209,  -6174,   2348,   -666,    508,    -28,     43,     -4,
        0,     14,     56,    247,    354,   1977,     18,  12963,
    18043,  -5733,   2398,   -583,    501,    -18,     41,     -4,
        0,     14,     56,    247,    354,   1977,     18,  12963,
    18043,  -5733,   2398,   -583,    501,    -18,     41,     -4,
        0,     15,     56,    266,    320,   2052,   -249,  13383,
    17855,  -5298,   2438,   -502,    491,     -9,     39,     -3,
        0,     15,     56,    266,    320,   2052,   -249,  13383,
    17855,  -5298,   2438,   -502,    491,     -9,     39,     -3,
        0,     17,     54,    284,    283,   2122,   -530,  13794,
    17648,  -4870,   2466,   -423,    480,     -1,     37,     -3,
        0,     17,     54,    284,    283,   2122,   -530,  13794,
    17648,  -4870,   2466,   -423,    480,     -1,     37,     -3,
        0,     18,     52,    302,    243,   2188,   -825,  14194,
    17420,  -4450,   2484,   -347,    468,      7,     35,     -3,
        0,     18,     52,    302,    243,   2188,   -825,  14194,
    17420,  -4450,   2484,   -347,    468,      7,     35,     -3,
        0,     19,     50,    320,    199,   2249,  -1133,  14583,
    17173,  -4039,   2492,   -274,    455,     14,     33,     -2,
        0,     19,     50,    320,    199,   2249,  -1133,  14583,
    17173,  -4039,   2492,   -274,    455,     14,     33,     -2,
       -1,     21,     48,    339,    152,   2304,  -1454,  14959,
    16908,  -3637,   2490,   -204,    440,     20,     32,     -2,
       -1,     21,     48,    339,    152,   2304,  -1454,  14959,
    16908,  -3637,   2490,   -204,    440,     20,     32,     -2,
       -1,     22,     45,    357,    101,   2354,  -1788,  15322,
    16624,  -3245,   2479,   -137,    425,     26,     30,     -2,
       -1,     22,     45,    357,    101,   2354,  -1788,  15322,
    16624,  -3245,   2479,   -137,    425,     26,     30,     -2,
       -1,     24,     41,    374,     47,   2396,  -2135,  15671,
    16323,  -2864,   2460,    -72,    409,     31,     28,     -2,
       -1,     24,     41,    374,     47,   2396,  -2135,  15671,
    16323,  -2864,   2460,    -72,    409,     31,     28,     -2,
       -1,     26,     37,    391,    -11,   2431,  -2493,  16004,
    16005,  -2494,   2432,    -12,    392,     36,     26,     -2,
       -1,     26,     37,    391,    -11,   2431,  -2493,  16004,
    16005,  -2494,   2432,    -12,    392,     36,     26,     -2,
       -2,    -28,     31,   -409,    -72,  -2460,  -2864, -16323,
    15671,   2135,   2396,    -47,    374,    -41,     24,      1,
       -2,    -28,     31,   -409,    -72,  -2460,  -2864, -16323,
    15671,   2135,   2396,    -47,    374,    -41,     24,      1,
       -2,    -30,     26,   -425,   -137,  -2479,  -3245, -16624,
    15322,   1788,   2354,   -101,    357,    -45,     22,      1,
       -2,    -30,     26,   -425,   -137,  -2479,  -3245, -16624,
    15322,   1788,   2354,   -101,    357,    -45,     22,      1,
       -2,    -32,     20,   -440,   -204,  -2490,  -3637, -16908,
    14959,   1454,   2304,   -152,    339,    -48,     21,      1,
       -2,    -32,     20,   -440,   -204,  -2490,  -3637, -16908,
    14959,   1454,   2304,   -152,    339,    -48,     21,      1,
       -2,    -33,     14,   -455,   -274,  -2492,  -4039, -17173,
    14583,   1133,   2249,   -199,    320,    -50,     19,      0,
       -2,    -33,     14,   -455,   -274,  -2492,  -4039, -17173,
    14583,   1133,   2249,   -199,    320,    -50,     19,      0,
       -3,    -35,      7,   -468,   -347,  -2484,  -4450, -17420,
    14194,    825,   2188,   -243,    302,    -52,     18,      0,
       -3,    -35,      7,   -468,   -347,  -2484,  -4450, -17420,
    14194,    825,   2188,   -243,    302,    -52,     18,      0,
       -3,    -37,     -1,   -480,   -423,  -2466,  -4870, -17648,
    13794,    530,   2122,   -283,    284,    -54,     17,      0,
       -3,    -37,     -1,   -480,   -423,  -2466,  -4870, -17648,
    13794,    530,   2122,   -283,    284,    -54,     17,      0,
       -3,    -39,     -9,   -491,   -502,  -2438,  -5298, -17855,
    13383,    249,   2052,   -320,    266,    -56,     15,      0,
       -3,    -39,     -9,   -491,   -502,  -2438,  -5298, -17855,
    13383,    249,   2052,   -320,    266,    -56,     15,      0,
       -4,    -41,    -18,   -501,   -583,  -2398,  -5733, -18043,
    12963,    -18,   1977,   -354,    247,    -56,     14,      0,
       -4,    -41,    -18,   -501,   -583,  -2398,  -5733, -18043,
    12963,    -18,   1977,   -354,    247,    -56,     14,      0,
       -4,    -43,    -28,   -508,   -666,  -2348,  -6174, -18209,
    12534,   -271,   1899,   -384,    229,    -57,     13,      0,
       -4,    -43,    -28,   -508,   -666,  -2348,  -6174, -18209,
    12534,   -271,   1899,   -384,    229,    -57,     13,      0,
       -4,    -44,    -39,   -515,   -751,  -2285,  -6621, -18354,
    12097,   -510,   1817,   -411,    212,    -57,     12,      0,
       -4,    -44,    -39,   -515,   -751,  -2285,  -6621, -18354,
    12097,   -510,   1817,   -411,    212,    -57,     12,      0,
       -5,    -46,    -50,   -519,   -838,  -2210,  -7073, -18477,
    11654,   -734,   1733,   -435,    194,    -57,     11,      0,
       -5,    -46,    -50,   -519,   -838,  -2210,  -7073, -18477,
    11654,   -734,   1733,   -435,    194,    -57,     11,      0,
       -5,    -48,    -61,   -522,   -927,  -2123,  -7528, -18579,
    11205,   -944,   1647,   -456,    177,    -57,     10,      0,
       -5,    -48,    -61,   -522,   -927,  -2123,  -7528, -18579,
    11205,   -944,   1647,   -456,    177,    -57,     10,      0,
       -6,    -49,    -74,   -522,  -1016,  -2023,  -7987, -18658,
    10751,  -1141,   1559,   -474,    161,    -57,      9,      0,
       -6,    -49,    -74,   -522,  -1016,  -2023,  -7987, -18658,
    10751,  -1141,   1559,   -474,    161,    -57,      9,      0,
       -6,    -51,    -87,   -520,  -1107,  -1910,  -8448, -18714,
    10294,  -1322,   1469,   -488,    145,    -56,      8,      0,
       -6,    -51,    -87,   -520,  -1107,  -1910,  -8448, -18714,
    10294,  -1322,   1469,   -488,    145,    -56,      8,      0,
       -7,    -52,   -101,   -516,  -1197,  -1784,  -8910, -18748,
     9834,  -1490,   1379,   -500,    129,    -55,      7,      0,
       -7,    -52,   -101,   -516,  -1197,  -1784,  -8910, -18748,
     9834,  -1490,   1379,   -500,    129,    -55,      7,      0,
};

int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
{
    static short buffs[2][2][0x110] __attribute__((aligned(8)));
    static int bo = 1;
    short *b0, (*buf)[0x110], *a, *b;
    const short* window;
    int bo1, i = 8;

    if (channel == 0) {
        bo = (bo - 1) & 0xf;
        buf = buffs[1];
    } else {
        samples++;
        buf = buffs[0];
    }

    if (bo & 1) {
        b0 = buf[1];
        bo1 = bo + 1;
        a = buf[0] + bo;
        b = buf[1] + ((bo + 1) & 0xf);
    } else {
        b0 = buf[0];
        bo1 = bo;
        b = buf[0] + bo;
        a = buf[1] + ((bo + 1) & 0xf);
    }

    dct64_MMX_func(a, b, bandPtr);
    window = mp3lib_decwins + 16 - bo1;
    //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
__asm__ volatile(
ASMALIGN(4)
"0:\n\t"
        "movq  (%1),%%mm0\n\t"
        "movq  64(%1),%%mm4\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "pmaddwd 32(%2),%%mm4\n\t"
        "movq  8(%1),%%mm1\n\t"
        "movq  72(%1),%%mm5\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "pmaddwd 40(%2),%%mm5\n\t"
        "movq  16(%1),%%mm2\n\t"
        "movq  80(%1),%%mm6\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "pmaddwd 48(%2),%%mm6\n\t"
        "movq  24(%1),%%mm3\n\t"
        "movq  88(%1),%%mm7\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "pmaddwd 56(%2),%%mm7\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm6,%%mm4\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "paddd %%mm7,%%mm4\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "movq  %%mm4,%%mm5\n\t"
        "psrlq $32,%%mm1\n\t"
        "psrlq $32,%%mm5\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "psrad $13,%%mm0\n\t"
        "psrad $13,%%mm4\n\t"
        "packssdw %%mm0,%%mm0\n\t"
        "packssdw %%mm4,%%mm4\n\t"

        "movq   (%3), %%mm1\n\t"
        "punpckldq %%mm4, %%mm0\n\t"
        "pand   "MANGLE(one_null)", %%mm1\n\t"
        "pand   "MANGLE(null_one)", %%mm0\n\t"
        "por    %%mm0, %%mm1\n\t"
        "movq   %%mm1,(%3)\n\t"

        "add $64,%2\n\t"
        "add $128,%1\n\t"
        "add $8,%3\n\t"

        "decl %0\n\t"
        "jnz  0b\n\t"

        "movq  (%1),%%mm0\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "movq  8(%1),%%mm1\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "movq  16(%1),%%mm2\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "movq  24(%1),%%mm3\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "psrlq $32,%%mm1\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "psrad $13,%%mm0\n\t"
        "packssdw %%mm0,%%mm0\n\t"
        "movd %%mm0,%%eax\n\t"
        "movw %%ax, (%3)\n\t"
        "sub $32,%2\n\t"
        "add $64,%1\n\t"
        "add $4,%3\n\t"

        "movl $7,%0\n\t"
ASMALIGN(4)
"1:\n\t"
        "movq  (%1),%%mm0\n\t"
        "movq  64(%1),%%mm4\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "pmaddwd -32(%2),%%mm4\n\t"
        "movq  8(%1),%%mm1\n\t"
        "movq  72(%1),%%mm5\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "pmaddwd -24(%2),%%mm5\n\t"
        "movq  16(%1),%%mm2\n\t"
        "movq  80(%1),%%mm6\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "pmaddwd -16(%2),%%mm6\n\t"
        "movq  24(%1),%%mm3\n\t"
        "movq  88(%1),%%mm7\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "pmaddwd -8(%2),%%mm7\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm6,%%mm4\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "paddd %%mm7,%%mm4\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "movq  %%mm4,%%mm5\n\t"
        "psrlq $32,%%mm1\n\t"
        "psrlq $32,%%mm5\n\t"
        "paddd %%mm0,%%mm1\n\t"
        "paddd %%mm4,%%mm5\n\t"
        "psrad $13,%%mm1\n\t"
        "psrad $13,%%mm5\n\t"
        "packssdw %%mm1,%%mm1\n\t"
        "packssdw %%mm5,%%mm5\n\t"
        "psubd %%mm0,%%mm0\n\t"
        "psubd %%mm4,%%mm4\n\t"
        "psubsw %%mm1,%%mm0\n\t"
        "psubsw %%mm5,%%mm4\n\t"

        "movq   (%3), %%mm1\n\t"
        "punpckldq %%mm4, %%mm0\n\t"
        "pand   "MANGLE(one_null)", %%mm1\n\t"
        "pand   "MANGLE(null_one)", %%mm0\n\t"
        "por    %%mm0, %%mm1\n\t"
        "movq   %%mm1,(%3)\n\t"

        "sub $64,%2\n\t"
        "add $128,%1\n\t"
        "add $8,%3\n\t"
        "decl %0\n\t"
        "jnz  1b\n\t"

        "movq  (%1),%%mm0\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "movq  8(%1),%%mm1\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "movq  16(%1),%%mm2\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "movq  24(%1),%%mm3\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "psrlq $32,%%mm1\n\t"
        "paddd %%mm0,%%mm1\n\t"
        "psrad $13,%%mm1\n\t"
        "packssdw %%mm1,%%mm1\n\t"
        "psubd %%mm0,%%mm0\n\t"
        "psubsw %%mm1,%%mm0\n\t"
        "movd %%mm0,%%eax\n\t"
        "movw %%ax,(%3)\n\t"
        "emms\n\t"
        :"+r"(i), "+r"(window), "+r"(b0), "+r"(samples)
        :
        :"memory", "%eax");
    return 0;
}
